import requests
import time
from lxml import etree
import csv

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 Safari/537.36',
    'referer': 'https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=26&page=2'
}
data = []
for i in range(1, 439):
    url = f'https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=26&page={i}'
    res = requests.get(url, headers=headers).content.decode()
    tree = etree.HTML(res)
    items = tree.xpath('//table[@class="table_list_02"]')
    print(f'正在爬取第{i}页')
    for item in items:
        fy_id = item.xpath('./tr[position()>1]/td/text()')[0]
        name = item.xpath('./tr[position()>1]/td/text()')[1].replace('\r', '').replace('\t', '').replace('\n', '')
        sf = item.xpath('./tr[position()>1]/td/text()')[2]
        zj = item.xpath('./tr[position()>1]/td/text()')[3]
        zx = item.xpath('./tr[position()>1]/td/text()')[4]
        fy = item.xpath('./tr[position()>1]/td/text()')[5]
        time1 = item.xpath('./tr[position()>1]/td/text()')[6]
        data.append([fy_id, name, sf, zj, zx, fy, time1])
    print(f'第{i}页爬取完成')
    time.sleep(1)

    with open('法院数据.csv', 'a', encoding='utf-8') as f:
        write = csv.writer(f)
        write.writerow(['序号', '姓名', '身份证号', '证件类型', '执行法院', '执行依据', '执行时间'])
        write.writerows(data)
    break

print(f'数据存储完成{len(data)}条数据')